regions <- tibble(state_abb = state.abb, region = state.region) %>% 
  rbind(tibble(state_abb = "DC", region = "South"))
days_on_zillow <- readxl::read_excel(here::here("data", "prepped", "days_on_zillow_state.xlsx")) %>% 
  mutate(date = as.Date(date)) %>% 
  mutate(state_abb = state.abb[match(RegionName, tolower(state.name))]) %>% 
  mutate(state_abb = replace(state_abb, RegionName == "district of columbia", "DC")) %>% 
  inner_join(regions)
## Joining, by = "state_abb"
tail(days_on_zillow)
## # A tibble: 6 x 8
##    year month date        days SizeRank RegionName      state_abb region   
##   <dbl> <dbl> <date>     <dbl>    <dbl> <chr>           <chr>     <fct>    
## 1  2019     7 2019-07-01    62       46 south dakota    SD        North Ce~
## 2  2019     7 2019-07-01    75       47 alaska          AK        West     
## 3  2019     7 2019-07-01    79       48 north dakota    ND        North Ce~
## 4  2019     7 2019-07-01    71       49 vermont         VT        Northeast
## 5  2019     7 2019-07-01    61       50 district of co~ DC        South    
## 6  2019     7 2019-07-01    58       51 wyoming         WY        West
summary(days_on_zillow)
##       year          month             date                 days      
##  Min.   :2010   Min.   : 1.000   Min.   :2010-01-01   Min.   : 42.0  
##  1st Qu.:2012   1st Qu.: 3.000   1st Qu.:2012-05-01   1st Qu.: 81.0  
##  Median :2014   Median : 6.000   Median :2014-10-01   Median :102.0  
##  Mean   :2014   Mean   : 6.348   Mean   :2014-10-01   Mean   :103.5  
##  3rd Qu.:2017   3rd Qu.: 9.000   3rd Qu.:2017-03-01   3rd Qu.:122.0  
##  Max.   :2019   Max.   :12.000   Max.   :2019-07-01   Max.   :263.0  
##                                                       NA's   :19     
##     SizeRank   RegionName         state_abb                   region    
##  Min.   : 1   Length:5865        Length:5865        Northeast    :1035  
##  1st Qu.:13   Class :character   Class :character   South        :1955  
##  Median :26   Mode  :character   Mode  :character   North Central:1380  
##  Mean   :26                                         West         :1495  
##  3rd Qu.:39                                                             
##  Max.   :51                                                             
## 
n_distinct(days_on_zillow$date)
## [1] 115
region_boxplot <- ggplot(days_on_zillow, aes(year, days, group = year)) +
  geom_boxplot() + 
  facet_wrap(~region)
ggplotly(region_boxplot)
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).
ggplot(days_on_zillow, aes(year, days, group = year)) +
  geom_boxplot()
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).

p1 <- ggplot(days_on_zillow, aes(date, days, color = state_abb)) +
  geom_line() +
  facet_wrap(~region)
ggplotly(p1)
ggplot(days_on_zillow, aes(date, days, color = state_abb)) +
  geom_line() 
## Warning: Removed 19 rows containing missing values (geom_path).

days_on_zillow_geo_data <- days_on_zillow %>%
  inner_join(map_data("state"), by = c("RegionName" = "region"))
tail(days_on_zillow_geo_data)
## # A tibble: 6 x 13
##    year month date        days SizeRank RegionName state_abb region  long
##   <dbl> <dbl> <date>     <dbl>    <dbl> <chr>      <chr>     <fct>  <dbl>
## 1  2019     7 2019-07-01    58       51 wyoming    WY        West   -106.
## 2  2019     7 2019-07-01    58       51 wyoming    WY        West   -107.
## 3  2019     7 2019-07-01    58       51 wyoming    WY        West   -107.
## 4  2019     7 2019-07-01    58       51 wyoming    WY        West   -108.
## 5  2019     7 2019-07-01    58       51 wyoming    WY        West   -109.
## 6  2019     7 2019-07-01    58       51 wyoming    WY        West   -109.
## # ... with 4 more variables: lat <dbl>, group <dbl>, order <int>,
## #   subregion <chr>
summarise(days_on_zillow_geo_data)
## # A tibble: 1 x 0
distinct(days_on_zillow_geo_data, days_on_zillow_geo_data$date)
## # A tibble: 115 x 1
##    `days_on_zillow_geo_data$date`
##    <date>                        
##  1 2010-01-01                    
##  2 2010-02-01                    
##  3 2010-03-01                    
##  4 2010-04-01                    
##  5 2010-05-01                    
##  6 2010-06-01                    
##  7 2010-07-01                    
##  8 2010-08-01                    
##  9 2010-09-01                    
## 10 2010-10-01                    
## # ... with 105 more rows
days_on_zillow_map <- ggplot(data = days_on_zillow_geo_data, 
             aes(x = long, 
                 y = lat,
                 fill = days,
                 group = group),
             color = "white") +
    geom_polygon() +
    coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
    transition_time(date) +
    ease_aes('cubic-in-out') +
    scale_fill_viridis_c(option = "plasma") + 
    labs(title = 'Date: {round(frame_time, 0)}') +
  theme_map()
animate(days_on_zillow_map,
        duration = 100) 

anim_save("days_on_zillow.gif")